From b9ed424f286ecb80aa668d3ff9e387fdb89b03e0 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Tue, 7 Jun 2005 15:30:17 +0000 Subject: [PATCH] bitkeeper revision 1.1691 (42a5bd892a-21ifB8kNwgNvEid-K_Q) Reove IRQ balancer from Xen. It is unused, and balancing will be done by the guests themselves. Signed-off-by: Keir Fraser --- xen/arch/x86/io_apic.c | 437 ----------------------------------------- 1 file changed, 437 deletions(-) diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c index a0020c7ec2..c582b7679f 100644 --- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -232,440 +232,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) spin_unlock_irqrestore(&ioapic_lock, flags); } -#if defined(CONFIG_IRQBALANCE) -# include /* kernel_thread() */ -# include /* kstat */ -# include /* kmalloc() */ -# include /* time_after() */ - -# ifdef CONFIG_BALANCED_IRQ_DEBUG -# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0) -# define Dprintk(x...) do { TDprintk(x); } while (0) -# else -# define TDprintk(x...) -# define Dprintk(x...) -# endif - -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; - -#define IRQBALANCE_CHECK_ARCH -999 -static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; -static int physical_balance = 0; - -struct irq_cpu_info { - unsigned long * last_irq; - unsigned long * irq_delta; - unsigned long irq; -} irq_cpu_data[NR_CPUS]; - -#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq]) - -#define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1)) - -#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) - -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) - -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) - -long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL; - -static unsigned long move(int curr_cpu, cpumask_t allowed_mask, - unsigned long now, int direction) -{ - int search_idle = 1; - int cpu = curr_cpu; - - goto inside; - - do { - if (unlikely(cpu == curr_cpu)) - search_idle = 0; -inside: - if (direction == 1) { - cpu++; - if (cpu >= NR_CPUS) - cpu = 0; - } else { - cpu--; - if (cpu == -1) - cpu = NR_CPUS-1; - } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu,now))); - - return cpu; -} - -static inline void balance_irq(int cpu, int irq) -{ - unsigned long now = jiffies; - cpumask_t allowed_mask; - unsigned int new_cpu; - - if (irqbalance_disabled) - return; - - cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); - new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) { - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); - spin_unlock_irqrestore(&desc->lock, flags); - } -} - -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) -{ - int i, j; - Dprintk("Rotating IRQs among CPUs.\n"); - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { - if (!irq_desc[j].action) - continue; - /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) < - useful_load_threshold) - continue; - balance_irq(i, j); - } - } - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; -} - -static void do_irq_balance(void) -{ - int i, j; - unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); - unsigned long move_this_load = 0; - int max_loaded = 0, min_loaded = 0; - int load; - unsigned long useful_load_threshold = balanced_irq_interval + 10; - int selected_irq; - int tmp_loaded, first_attempt = 1; - unsigned long tmp_cpu_irq; - unsigned long imbalance = 0; - cpumask_t allowed_mask, target_cpu_mask, tmp; - - for (i = 0; i < NR_CPUS; i++) { - int package_index; - CPU_IRQ(i) = 0; - if (!cpu_online(i)) - continue; - package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < NR_IRQS; j++) { - unsigned long value_now, delta; - /* Is this an active IRQ? */ - if (!irq_desc[j].action) - continue; - if ( package_index == i ) - IRQ_DELTA(package_index,j) = 0; - /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_cpu(i).irqs[j]; - - /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i,j); - - /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i,j) = value_now; - - /* Ignore IRQs whose rate is less than the clock */ - if (delta < useful_load_threshold) - continue; - /* update the load for the processor or package total */ - IRQ_DELTA(package_index,j) += delta; - - /* Keep track of the higher numbered sibling as well */ - if (i != package_index) - CPU_IRQ(i) += delta; - /* - * We have sibling A and sibling B in the package - * - * cpu_irq[A] = load for cpu A + load for cpu B - * cpu_irq[B] = load for cpu B - */ - CPU_IRQ(package_index) += delta; - } - } - /* Find the least loaded processor package */ - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (min_cpu_irq > CPU_IRQ(i)) { - min_cpu_irq = CPU_IRQ(i); - min_loaded = i; - } - } - max_cpu_irq = ULONG_MAX; - -tryanothercpu: - /* Look for heaviest loaded processor. - * We may come back to get the next heaviest loaded processor. - * Skip processors with trivial loads. - */ - tmp_cpu_irq = 0; - tmp_loaded = -1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (max_cpu_irq <= CPU_IRQ(i)) - continue; - if (tmp_cpu_irq < CPU_IRQ(i)) { - tmp_cpu_irq = CPU_IRQ(i); - tmp_loaded = i; - } - } - - if (tmp_loaded == -1) { - /* In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original - * approach to rotate them around. - */ - if (!first_attempt && imbalance >= useful_load_threshold) { - rotate_irqs_among_cpus(useful_load_threshold); - return; - } - goto not_worth_the_effort; - } - - first_attempt = 0; /* heaviest search */ - max_cpu_irq = tmp_cpu_irq; /* load */ - max_loaded = tmp_loaded; /* processor */ - imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - Dprintk("max_loaded cpu = %d\n", max_loaded); - Dprintk("min_loaded cpu = %d\n", min_loaded); - Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq); - Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq); - Dprintk("load imbalance = %lu\n", imbalance); - - /* if imbalance is less than approx 10% of max load, then - * observe diminishing returns action. - quit - */ - if (imbalance < (max_cpu_irq >> 3)) { - Dprintk("Imbalance too trivial\n"); - goto not_worth_the_effort; - } - -tryanotherirq: - /* if we select an IRQ to move that can't go where we want, then - * see if there is another one to try. - */ - move_this_load = 0; - selected_irq = -1; - for (j = 0; j < NR_IRQS; j++) { - /* Is this an active IRQ? */ - if (!irq_desc[j].action) - continue; - if (imbalance <= IRQ_DELTA(max_loaded,j)) - continue; - /* Try to find the IRQ that is closest to the imbalance - * without going over. - */ - if (move_this_load < IRQ_DELTA(max_loaded,j)) { - move_this_load = IRQ_DELTA(max_loaded,j); - selected_irq = j; - } - } - if (selected_irq == -1) { - goto tryanothercpu; - } - - imbalance = move_this_load; - - /* For physical_balance case, we accumlated both load - * values in the one of the siblings cpu_irq[], - * to use the same code for physical and logical processors - * as much as possible. - * - * NOTE: the cpu_irq[] array holds the sum of the load for - * sibling A and sibling B in the slot for the lowest numbered - * sibling (A), _AND_ the load for sibling B in the slot for - * the higher numbered sibling. - * - * We seek the least loaded sibling by making the comparison - * (A+B)/2 vs B - */ - load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { - if (load > CPU_IRQ(j)) { - /* This won't change cpu_sibling_map[min_loaded] */ - load = CPU_IRQ(j); - min_loaded = j; - } - } - - cpus_and(allowed_mask, cpu_online_map, irq_affinity[selected_irq]); - target_cpu_mask = cpumask_of_cpu(min_loaded); - cpus_and(tmp, target_cpu_mask, allowed_mask); - - if (!cpus_empty(tmp)) { - irq_desc_t *desc = irq_desc + selected_irq; - unsigned long flags; - - Dprintk("irq = %d moved to cpu = %d\n", - selected_irq, min_loaded); - /* mark for change destination */ - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[selected_irq] = - cpumask_of_cpu(min_loaded); - spin_unlock_irqrestore(&desc->lock, flags); - /* Since we made a change, come back sooner to - * check for more variation. - */ - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; - } - goto tryanotherirq; - -not_worth_the_effort: - /* - * if we did not find an IRQ to move, then adjust the time interval - * upward - */ - balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - Dprintk("IRQ worth rotating not found\n"); - return; -} - -static int balanced_irq(void *unused) -{ - int i; - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - - daemonize("kirqd"); - - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); - } - - for ( ; ; ) { - set_current_state(TASK_INTERRUPTIBLE); - time_remaining = schedule_timeout(time_remaining); - try_to_freeze(PF_FREEZE); - if (time_after(jiffies, - prev_balance_time+balanced_irq_interval)) { - do_irq_balance(); - prev_balance_time = jiffies; - time_remaining = balanced_irq_interval; - } - } - return 0; -} - -static int __init balanced_irq_init(void) -{ - int i; - struct cpuinfo_x86 *c; - cpumask_t tmp; - - cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; - /* When not overwritten by the command line ask subarchitecture. */ - if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) - irqbalance_disabled = NO_BALANCE_IRQ; - if (irqbalance_disabled) - return 0; - - /* disable irqbalance completely if there is only one processor online */ - if (num_online_cpus() < 2) { - irqbalance_disabled = 1; - return 0; - } - /* - * Enable physical balance only if more than 1 physical processor - * is present - */ - if (smp_num_siblings > 1 && !cpus_empty(tmp)) - physical_balance = 1; - - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; - irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { - printk(KERN_ERR "balanced_irq_init: out of memory"); - goto failed; - } - memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS); - memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS); - } - - printk(KERN_INFO "Starting balanced_irq\n"); - if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) - return 0; - else - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -failed: - for (i = 0; i < NR_CPUS; i++) { - if(irq_cpu_data[i].irq_delta) - kfree(irq_cpu_data[i].irq_delta); - if(irq_cpu_data[i].last_irq) - kfree(irq_cpu_data[i].last_irq); - } - return 0; -} - -int __init irqbalance_disable(char *str) -{ - irqbalance_disabled = 1; - return 0; -} - -__setup("noirqbalance", irqbalance_disable); - -static inline void move_irq(int irq) -{ - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } -} - -late_initcall(balanced_irq_init); - -#else /* !CONFIG_IRQBALANCE */ -static inline void move_irq(int irq) { } -#endif /* CONFIG_IRQBALANCE */ - -#ifndef CONFIG_SMP -void fastcall send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write_around(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP */ - /* * Find the IRQ entry number of a certain pin. */ @@ -1610,7 +1176,6 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) */ static void ack_edge_ioapic_irq(unsigned int irq) { - move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1643,8 +1208,6 @@ static void mask_and_ack_level_ioapic_irq (unsigned int irq) unsigned long v; int i; - move_irq(irq); - mask_IO_APIC_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 -- 2.30.2